# calculate the probability of each intermediate reaching the interface TFs, and their effect
# use surprisal as edge weight, use most probable path to define P(intermediate-interfaceTF)
# modify edge weights according to the correlation of gene expression

# signalling network
library(igraph)
graph_str = readRDS("../data/curated_graph.Rdata")

# arguments
args = commandArgs(TRUE)
cutoff=args[1]

# interface TFs
interfaceTFs = read.table(sprintf("interfaceTFs-cutoff%s.txt",cutoff),sep="\t",quote="",stringsAsFactors=FALSE)[,1]

## 1. calculate edge weights and signs
pexpr = read.table("weights_MetacoreObjects.txt",stringsAsFactors=FALSE,row.names=1,sep="\t",quote="")

# assign weights to each edge as surprisal: log(1/p(expr target node))
weights = sapply(E(graph_str),function(x){
	down = head_of(graph_str,x)
	log(1/pexpr[down,2]) # surprisal
})
#assign signs to the edges
signs = ifelse(E(graph_str)$V5=="Activation",1,-1)

## get Pearson correlation among gene expression values across all CMap datasets (only corr>0.7)
correlation = read.table("../data/correlation_edges_0p7-CURATED.txt",sep="\t",stringsAsFactors=FALSE,quote="",header=TRUE) 

right = which((signs<0 & correlation$C<0) | (signs>0 & correlation$C>0)) # find which edges have correlation coherent with sign
weights[right] = weights[right]/2 #reduce their weight = increase their probability

# 2. calculate probabilities
library(foreach)
library(doParallel)
numWorkers = min(c(detectCores() - 1,119))
registerDoParallel(numWorkers)

# for each intermediate-interfaceTF couple find most probable paths (min sum of surprisals)
E_paths = foreach(iTF=interfaceTFs, .combine=list, .inorder=FALSE, .multicombine=TRUE,.maxcombine = length(interfaceTFs), .packages="igraph") %dopar% {
	# find "shortest" path from an interface TF to all signalling molecules
	x = shortest_paths(graph_str,iTF,V(graph_str),mode="in",weights=weights,output="epath")
	edges = x$epath
	edges = edges[lengths(edges)>0]
	# for each of the signalling molecules reached:
	P = lapply(edges, function(y){
		p = sum(weights[as.integer(y)]) * prod(signs[as.integer(y)]) # sign*surprisal
		extr = c(as.character(tail_of(graph_str,y[length(y)])), as.character(head_of(graph_str,y[1]))) # extract extremes of path
		c(extr,p)
	})
	return(do.call(rbind,P))
}
M = do.call(rbind,E_paths)

# turn into a table
X = matrix(Inf,nrow=length(interfaceTFs),ncol=vcount(graph_str),dimnames=list(interfaceTFs,V(graph_str)$name))
for (i in 1:nrow(M)) {
	X[as.character(V(graph_str)[as.integer(M[i,2])]$name),as.integer(M[i,1])] = M[i,3]
}
X = apply(X,2,as.double)

# transform from surprisals to probability
surpPs = 1/exp(abs(X))
surpPs[which(sign(X)== -1)] = -surpPs[which(sign(X)== -1)]

# prepare probability of activation and inhibition of each signalling molecule
activateP = surpPs
activateP[activateP<0] = 0
inhibitP = surpPs
inhibitP[inhibitP>0] = 0
inhibitP = abs(inhibitP)
rownames(activateP) = paste0(interfaceTFs,"_1")
rownames(inhibitP) = paste0(interfaceTFs,"_0")
calcPs = rbind(activateP,inhibitP)
calcPs = calcPs[,colSums(calcPs)>0] # remove intermediates that don't reach anything
calcPs = apply(calcPs, 2,function(x) x/sum(x)) # normalize so that for each signalling molecule the probabilities sum to 1

write.table(calcPs,sprintf("intermediateToTFsP-correlation-cutoff%s.txt",cutoff),sep="\t",quote=FALSE)
